FTIR data assessments in collaboration with UQ

Import

# PREPARE
library(phyloseq)
library(ggpubr)       # a handy helper package for ggplots
theme_set(theme_bw())  # setting the theme for ggplots
library(microbiome)
library(tidyverse)
library(DT)
`%notin%` <- Negate(`%in%`)

# MICROBIAL DATA
# reading in a previously saved phyloseq object. This contains microbial 16S-ASV abundances of all samples. 
ps_1C <- readRDS("./data/ps_215samplesJul24")
# ps_1C
#phyloseq-class experiment-level object
#otu_table()   OTU Table:         [ 17961 taxa and 215 samples ]
#sample_data() Sample Data:       [ 215 samples by 48 sample variables ]
#tax_table()   Taxonomy Table:    [ 17961 taxa by 7 taxonomic ranks ]
#phy_tree()    Phylogenetic Tree: [ 17961 tips and 17822 internal nodes ]

### Pre-filtering microbial data
ps.flt  <-  prune_taxa(taxa_sums(ps_1C) >= 10, ps_1C) #m inimum reads per ASV
#phyloseq-class experiment-level object
#otu_table()   OTU Table:         [ 5855 taxa and 215 samples ]
#sample_data() Sample Data:       [ 215 samples by 60 sample variables ]
#tax_table()   Taxonomy Table:    [ 5855 taxa by 7 taxonomic ranks ]
#phy_tree()    Phylogenetic Tree: [ 5855 tips and 5763 internal nodes ]

# MASTERDATA
# change this path to where you store the .csv file. you can provide an absolute path like this. Check the path syntax for Windows as it might be different. 
masterdata <- read.csv("./data/masterdataProject1C_May24.csv")
## Change date to date format in R 
masterdata <-  masterdata %>%
  mutate(Date = lubridate::dmy(as.character(Date)) )
# Make AD, Treatment and SludgeType = factors
masterdata$AD <- factor(masterdata$AD, levels = c("AD1", "AD2","AD3", "AD4","AD5", "AD6", "Full-scale", "PSTWAS"))
masterdata$SludgeType <- factor(masterdata$SludgeType, levels = c("Control", "Treatment", "Foam", "Full-scale", "PSTWAS"))
masterdata$Treatment <- factor(masterdata$Treatment, levels = c("Control", "Treatment", "Full-scale", "PSTWAS"))
masterdata$Period <- factor(masterdata$Period, levels = c("Converging", "SteadyState", "Glycerol", "Inhibition", "Recovery/Feedingpause", "Recovery/Foaming", "Recovery/Postfoam", "SteadyState/Postfoam"))


# LOAD REFLECTANCE DATA 
ATR <- read.csv("./data/ADVATR.csv")[-1,] 
ATR <- ATR %>% rownames_to_column("ID")%>% column_to_rownames("X") %>% dplyr::select(-ID) %>% 
  mutate(across(S537_AD1:S1370_AD6, as.numeric))
# OPTION 1: raw
ATR <-  as.matrix(ATR)
head(ATR) %>% datatable(caption = "ATR raw") 
# OPTION 2: clr transformed
ATRclr <- compositions::clr(ATR)
head(ATRclr) %>% datatable(caption = "ATR clr") 
# CREATE METADATA FOR 24 SELECTED SAMPLES AND MATCH SAMPLE NAMES WITH SPECTRA DF
filtervec <- c("2023-05-01", "2023-05-15", "2023-06-07","2023-06-12")  # 12thJuneDNA = 13th June EPS
filtervec2 <- c("AD1", "AD2", "AD3", "AD4", "AD5","AD6")  # check
metadata <- masterdata %>% 
  dplyr::filter(Date %in% filtervec & 
                  AD %in% filtervec2)
metadata$SampleID.EPS <- colnames(ATR)
metadata <- metadata %>% column_to_rownames("SampleID.EPS")
head(metadata ) %>% datatable(caption = "metadata for 24 selected samples") 
# CREATE A PHYLOSEQ OBJECTS (combined metadata and spectra)
psATR <-phyloseq(
  otu_table(ATR, taxa_are_rows = T), 
  sample_data(metadata)
)

psATRclr <-phyloseq(
  otu_table(ATRclr, taxa_are_rows = T), 
  sample_data(metadata)
)


# ABSORBANCE DATA 
AVE <- read.csv("./data/AVE.csv")[-1,] 
AVE <- AVE %>% rownames_to_column("ID")%>% column_to_rownames("X") %>% dplyr::select(-ID) %>% 
  mutate(across(S537_AD1:S1370_AD6, as.numeric))
# OPTION 1: raw
AVE <-  as.matrix(AVE)
# OPTION 2: clr transformed
AVEclr <- compositions::clr(AVE)
# head(AVEclr)

# CREATE A PHYLOSEQ OBJECTS (combined metadata and spectra)
psAVE <-phyloseq(
  otu_table(AVE, taxa_are_rows = T), 
  sample_data(metadata)
)

psAVEclr <-phyloseq(
  otu_table(AVEclr, taxa_are_rows = T), 
  sample_data(metadata)
)

Gas flow - ind ADs

This is just for you to see variability of performance of individual ADs.

## [1] 2

Microbial 16S (V3V4) PCA

Just to see how microbial 16S abundances vary among the selected samples. We expect treated/inhibited samples (15th May) to be different.

Raw spectra

PCA ATR

library(factoextra)
library(compositions)
my_comparisons <- list(c(1, 2))
symbolsize <- 3

ps.temp <- psATR
pca <- prcomp(otu_table(ps.temp),  scale = FALSE, center = TRUE)
# summary(otu_table(ps.temp))
# head(otu_table(ps.temp))

data <- get_pca(pca)
cord <- data$coord  # extract sample coordinates of PC
cor <- data$cor
cos2 <- data$cos2
contrib <- data$contrib # contributions of variables

# Combine 
df.tmp <- (data.frame(cord) %>% rownames_to_column("ID")) %>% left_join(sample_data(ps.temp) %>% rownames_to_column("ID") ) 
# compare_means(Dim.1 ~ Treatment, method = "t.test", df.tmp) #0.315
# compare_means(Dim.2 ~ Treatment, method = "t.test", df.tmp) #0.908
# compare_means(Dim.3 ~ Treatment, method = "t.test", df.tmp) #0.0075 **
# compare_means(Dim.4 ~ Treatment, method = "t.test", df.tmp) #0.205

Compare groups

T-TESTS, p-values Assessing if there were significant differences in the means of Principal Components (PCs) coordinates between groups (control and treatment)

PCA AVE

library(factoextra)
library(compositions)

ps.temp <- psAVE
# summary(otu_table(ps.temp))
pca <- prcomp(data.frame(otu_table(ps.temp)), scale = FALSE, center = TRUE)
# otu_table(ps.temp)
data <- get_pca(pca)
cord <- data$coord
cor <- data$cor
cos2 <- data$cos2
contrib <- data$contrib

df.tmp <- (data.frame(cord) %>% rownames_to_column("ID")) %>% 
            left_join(sample_data(ps.temp) %>% rownames_to_column("ID") ) 

# compare_means(Dim.1 ~ Treatment, method = "t.test", df.tmp) #0.133
# compare_means(Dim.2 ~ Treatment, method = "t.test", df.tmp) #0.397
# compare_means(Dim.3 ~ Treatment, method = "t.test", df.tmp) #0.026 *
# compare_means(Dim.4 ~ Treatment, method = "t.test", df.tmp) #0.502

Compare groups

T-TESTS, p-values Assessing if there were significant differences in the means of Principal Components (PCs) coordinates between groups (control and treatment)

Comparing ATR and AVE of PC3

Spectra contributions to PC3

Transformed spectra - centred-log ratio

PCA ATR clr

library(factoextra)
library(compositions)

ps.temp <- psATRclr
pca <- prcomp(otu_table(ps.temp),  scale = FALSE, center = FALSE)
# summary(otu_table(ps.temp))
# otu_table(ps.temp)
data <- get_pca(pca)
cord <- data$coord
cor <- data$cor
cos2 <- data$cos2
contrib <- data$contrib


df.tmp <- (data.frame(cord) %>% rownames_to_column("ID")) %>% left_join(sample_data(ps.temp) %>% rownames_to_column("ID") ) 
# compare_means(Dim.1 ~ Treatment, method = "t.test", df.tmp) #0.281
# compare_means(Dim.2 ~ Treatment, method = "t.test", df.tmp) #0.627
# compare_means(Dim.3 ~ Treatment, method = "t.test", df.tmp) #0.00117 **
# compare_means(Dim.4 ~ Treatment, method = "t.test", df.tmp) # 0.518

Compare groups

T-TESTS, p-values Assessing if there were significant differences in the means of Principal Components (PCs) coordinates between groups (control and treatment)

PCA AVE - clr

library(factoextra)
library(compositions)
my_comparisons <- list(c(1, 2))

ps.temp <- psAVEclr
# summary(otu_table(ps.temp))
pca <- prcomp(data.frame(otu_table(ps.temp)), scale = FALSE, center = FALSE)  # FALSE FOR BOTH - CLR data should already be centred and scaled. 
# otu_table(ps.temp)
data <- get_pca(pca)
cord <- data$coord
cor <- data$cor
cos2 <- data$cos2
contrib <- data$contrib
df.tmp <- (data.frame(cord) %>% rownames_to_column("ID")) %>% left_join(sample_data(ps.temp) %>% rownames_to_column("ID") ) 
# compare_means(Dim.1 ~ Treatment, method = "t.test", df.tmp) #0.172
# compare_means(Dim.2 ~ Treatment, method = "t.test", df.tmp) #0.708
# compare_means(Dim.3 ~ Treatment, method = "t.test", df.tmp) #0.0228 *
# compare_means(Dim.4 ~ Treatment, method = "t.test", df.tmp) #0.613

Compare groups

T-TESTS, p-values Assessing if there were significant differences in the means of Principal Components (PCs) coordinates between groups (control and treatment)

Comparing ATR and AVE of PC3

Spectra contributions to PC3

Unsure why the clr-transformed ATR spectra resulted in such different PC3 contributions compared to clr-transformed AVE spectra.

Corplot

This needs further assessments/regression etc as it was done without any consideration to data distribution / validity.

library(ggcorrplot)

# Compute a correlation matrix
df.cor <- df.tmp %>% filter(ID != "S963_AD6") %>%  # filter this sample as it has NAs
  dplyr::select(Dim.1, Dim.2, Dim.3, Dim.4, Dim.5, Gasflow_mL, C, H_pct, N_pct, CN, HC, Ethanol:Hexanoic.acid, ngmL_qb, DNA_mg_gCOD)
corr <- round(cor(df.cor, method = "spearman"), 1) 
p <- ggcorrplot(corr)
p + ggtitle("AVE clr correlations to variables")

#ggsave(plot = p, "./Figures/PC3correlations_clr.png", height=18, width=18, units='cm', dpi=300)